#pca plots
pca.ctr[pca.ctr$Line %in% 'MSN38',]$Line <- 'MSN-38'
pca.ctr[pca.ctr$Line %in% 'MSN9',]$Line <- 'MSN-9'
pca.ctr[pca.ctr$Line %in% 'WTC11',]$Line <- 'WTC-11'
pca.ctr$Organoids <- NA
pca.ctr[pca.ctr$COiMg %in% 'yes',]$Organoids <- 'COiMG'
pca.ctr[pca.ctr$COiMg %in% 'no',]$Organoids <- 'CO'
PCAplot(pca.ctr, "Organoids", Shape = "Line", PoV.df=PoV.ctr, pc.1 = 1, pc.2 = 2, colors=c('lightgrey','black'))
volcano_plot(data.frame(new_coimg), title = 'CO vs COiMg',
annotate_by=unlist(AB.genes), ymax1 = 30, ymax2 = 31, xmax1 = -5, xmax2 = 8) +
scale_x_continuous(breaks = c(-5,-3,-1, 0,1,3,5),
labels = c("-5",
'-3',
"-1",
"0",
"1",
'3',
"5"))
meta.ctr$Organoids <- NA
meta.ctr[meta.ctr$COiMg %in% 'yes',]$Organoids <- 'COiMg'
meta.ctr[meta.ctr$COiMg %in% 'no',]$Organoids <- 'CO'
boxplot1 <- data.frame('Sample'=rownames(meta.ctr),'Organoids'=meta.ctr$Organoids,t(batch.ctr[rownames(batch.ctr) %in% c('C3',"C1QA","CX3CR1","TLR4"),]))
boxplot2 <- data.frame('Sample'=rownames(meta.ctr),'Organoids'=meta.ctr$Organoids,t(batch.ctr[rownames(batch.ctr) %in% c("CD68","TREM2",'TYROBP',"SYK"),]))
boxplot3 <- data.frame('Sample'=rownames(meta.ctr),'Organoids'=meta.ctr$Organoids,t(batch.ctr[rownames(batch.ctr) %in% c("SPP1","CSF1R","IRF8"),]))
# Reshape the data using gather
expression_1 <- gather(boxplot1, key = "Gene", value = "Expression", -Sample, -Organoids)
expression_2 <- gather(boxplot2, key = "Gene", value = "Expression", -Sample, -Organoids)
expression_3 <- gather(boxplot3, key = "Gene", value = "Expression", -Sample, -Organoids)
ggplot(expression_1,aes(x = Gene, y = Expression, fill = Organoids)) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.3, alpha = 0.8, outlier.shape = NA) +
geom_point(aes(fill = Organoids), size = 3, shape = 21, position = position_jitterdodge(dodge.width = 0.8, jitter.height = 0.2, jitter.width = 0.01)) +
theme_minimal() +
scale_fill_manual(values=c("white","black")) +
labs(y = "Standardized expression")
ggplot(expression_2,aes(x = Gene, y = Expression, fill = Organoids)) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.3, alpha = 0.8, outlier.shape = NA) +
geom_point(aes(fill = Organoids), size = 3, shape = 21, position = position_jitterdodge(dodge.width = 0.8, jitter.height = 0.2, jitter.width = 0.01)) +
theme_minimal() +
scale_fill_manual(values=c("white","black")) +
labs(y = "Standardized expression")
ggplot(expression_3,aes(x = Gene, y = Expression, fill = Organoids)) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.3, alpha = 0.8, outlier.shape = NA) +
geom_point(aes(fill = Organoids), size = 3, shape = 21, position = position_jitterdodge(dodge.width = 0.8, jitter.height = 0.2, jitter.width = 0.01)) +
theme_minimal() +
scale_fill_manual(values=c("white","black")) +
labs(y = "Standardized expression")
ra.ctr <- HeatmapAnnotation(
Organoids = meta.ctr$Organoids,
col = list(
Organoids = c("COiMg"='black','CO'='white')),
show_annotation_name = T,
show_legend = T)
ComplexHeatmap::Heatmap(t(hm_counts.ctr),
cluster_rows = T,
cluster_columns = T,
show_row_dend = T,
show_column_dend = T,
show_row_names = F,
show_column_names = F,
top_annotation = ra.ctr,
right_annotation = rowAnnotation(
text = anno_text(colnames(hm_counts.ctr), rot = 0, just = "left",gp=gpar(fontsize=8))),
col = colorRampPalette(rev(RColorBrewer::brewer.pal(9,"RdBu")))(10),
name = "Z-score expression")
## Enrichment heatmap (gene sets of interest)
ComplexHeatmap::Heatmap(DEG.enrich_coimg[!rownames(DEG.enrich_coimg) %in% 'Lipid metabolism',],
cluster_rows = F,
cluster_columns = F,
show_row_dend = F,
show_column_dend = F,
show_row_names = T,
col = colorRampPalette(RColorBrewer::brewer.pal(9,"Blues"))(30),
name = "log2(q)",
)
dotplot(new_coimg.upregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")
dotplot(new_coimg.downregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")
ctr_CO <- batch.rem3_mod1[,colnames(batch.rem3_mod1) %in% rownames(meta[meta$COiMg %in% "no" & meta$condition %in% "IFNg",])]
ctr_COiMG <- batch.rem3_mod1[,colnames(batch.rem3_mod1) %in% rownames(meta[meta$COiMg %in% "yes" & meta$condition %in% "IFNg",])]
cor(ctr_CO,ctr_COiMG)
## M13 M14 M15 M21 M27 M33 F9
## M16 0.9888853 0.9750494 0.9854099 0.9860980 0.9844882 0.9665080 0.9772695
## M17 0.9882932 0.9689689 0.9821487 0.9803589 0.9830655 0.9582293 0.9703453
## M18 0.9853977 0.9796923 0.9773084 0.9801409 0.9848437 0.9628487 0.9773172
## M24 0.9893057 0.9799679 0.9870001 0.9855227 0.9831987 0.9708646 0.9793361
## M30 0.9839439 0.9849775 0.9782459 0.9812581 0.9760353 0.9881220 0.9857833
## M36 0.9808666 0.9739084 0.9819043 0.9848451 0.9726188 0.9723869 0.9735683
## F15 0.9896431 0.9848594 0.9858859 0.9880105 0.9875725 0.9839841 0.9898616
## F16 0.9736695 0.9781072 0.9730415 0.9772165 0.9707696 0.9863550 0.9842931
## F17 0.9837642 0.9760274 0.9815035 0.9841363 0.9823853 0.9590486 0.9744931
## F10
## M16 0.9820133
## M17 0.9788688
## M18 0.9814824
## M24 0.9853527
## M30 0.9823925
## M36 0.9794674
## F15 0.9870064
## F16 0.9753547
## F17 0.9794916
df1 <- data.frame('CO'=rowMeans(ctr_CO))
df2 <- data.frame('COiMg'=rowMeans(ctr_COiMG))
df.plot <- cbind(df1,df2)
quantile(as.matrix(df1-df2))
## 0% 25% 50% 75% 100%
## -5.52300947 -0.16864063 0.01132637 0.16271150 1.86109744
rownames(df.plot)[df1-df2 >= 1 | df1-df2 <= -4]
## [1] "DLX6" "NOS2" "TFAP2B" "MGST1" "PAX7" "KITLG" "ASIC4"
## [8] "NAALAD2" "MOXD1" "LRP2" "SLC32A1" "TOX3" "WNT3" "FZD10"
## [15] "SPP1" "ONECUT2" "PAX3" "GAD2" "ASCL1" "ZFHX3" "GREB1L"
## [22] "C1QL2" "DLX1" "SFRP2" "TENM2" "CDH8" "POU4F1" "ZIC1"
## [29] "ROBO3" "WNT7A" "ZIC3" "H4C8" "VCAM1" "NDST3" "CRABP1"
## [36] "PTF1A" "GBX2" "SHOX2" "IRX1" "IRX2" "HSPA6" "LRRN3"
## [43] "WFIKKN2" "ZIC4" "PRIMA1" "IRX5" "OLIG3" "IRX3" "GSX2"
## [50] "FIGN" "BGN" "SOX1" "POU3F2" "PCDH18" "GREB1" "POU3F4"
## [57] "HES5" "TOX" "HSPA1B" "HSPA1A" "VGLL3" "TRIM71" "UBD"
## [64] "SP9" "OR2I1P" "H4C14" "H2BC8"
Gene <- ifelse(rownames(df.plot) %in% rownames(df.plot)[df1-df2 >= 1 | df1-df2 <= -4], rownames(df.plot), "")
ggplot(df.plot, aes(x=CO,y=COiMg))+
geom_point() +
labs(title = "Expression overlap CO vs COiMg under IFNy stimulation",
x = "Standardized CO counts",
y = "Standardized COiMg counts") +
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue", se=TRUE) +
geom_point(shape=18, color="grey")+
stat_cor(method = "pearson", label.x = 0, label.y = 20)+
geom_text_repel(data = df.plot, aes(x = CO, y = COiMg, label = Gene), vjust = 3, size = 3) +
theme_minimal()
pca.ifny[pca.ifny$Line %in% 'MSN38',]$Line <- 'MSN-38'
pca.ifny[pca.ifny$Line %in% 'MSN9',]$Line <- 'MSN-9'
pca.ifny[pca.ifny$Line %in% 'WTC11',]$Line <- 'WTC-11'
PCAplot(pca.ifny, "condition", Shape = "Line", PoV.df=PoV.ifny, pc.1 = 1, pc.2 = 2, colors = c('lightgrey','#98FB98')) #M30 & M33 are outleirs, let's remove them for now
new_IFNy$symbol <- rownames(new_IFNy)
top30.new_IFNy <- c(rownames(new_IFNy[order(new_IFNy$log2FoldChange),][new_IFNy[order(new_IFNy$log2FoldChange),]$padj < 0.05,][1:15,]),
rownames(new_IFNy[order(new_IFNy$log2FoldChange, decreasing = T),][new_IFNy[order(new_IFNy$log2FoldChange, decreasing = T),]$padj < 0.05,][1:15,]))
volcano_plot(data.frame(new_IFNy), title = 'ctr vs IFNy',
annotate_by=top30.new_IFNy, ymax1 = 60, ymax2 = 61, xmax1 = -5, xmax2 = 10) +
scale_x_continuous(breaks = c(-5,-3,-1, 0,1,3,5),
labels = c("-5",
'-3',
"-1",
"0",
"1",
'3',
"5"))
meta.CO[meta.CO$Line %in% 'MSN38',]$Line <- 'MSN-38'
meta.CO[meta.CO$Line %in% 'MSN9',]$Line <- 'MSN-9'
meta.CO[meta.CO$Line %in% 'WTC11',]$Line <- 'WTC-11'
boxplot.g_ifny_down <- boxplot.g_ifny[boxplot.g_ifny$Sample %in% rownames(meta.CO[!meta.CO$condition %in% 'LPS',]),]
boxplot.g_ifny_down <- boxplot.g_ifny_down[,colnames(boxplot.g_ifny_down) %in% c("CLDN5","IGF1","Stimulation","Sample")]
boxplot.g_ifny_down <- gather(boxplot.g_ifny_down, key = "Gene", value = "Expression", -Sample, -Stimulation)
boxplot.g_ifny_up <- boxplot.g_ifny[boxplot.g_ifny$Sample %in% rownames(meta.CO[!meta.CO$condition %in% 'LPS',]),]
boxplot.g_ifny_up <- boxplot.g_ifny_up[,!colnames(boxplot.g_ifny_up) %in% c("CLDN5","IGF1")]
boxplot.g_ifny_up <- gather(boxplot.g_ifny_up, key = "Gene", value = "Expression", -Sample, -Stimulation)
ggplot(boxplot.g_ifny_down,aes(x = Gene, y = Expression, fill = Stimulation)) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.3, alpha = 0.8, outlier.shape = NA) +
geom_point(aes(fill = Stimulation), size = 3, shape = 21, position = position_jitterdodge(dodge.width = 0.8, jitter.height = 0.2, jitter.width = 0.01)) +
theme_minimal() +
scale_fill_manual(values=c("white","black")) +
labs(y = "Standardized expression")
ggplot(boxplot.g_ifny_up,aes(x = Gene, y = Expression, fill = Stimulation)) +
geom_boxplot(position = position_dodge(width = 0.8), width = 0.3, alpha = 0.8, outlier.shape = NA) +
geom_point(aes(fill = Stimulation), size = 3, shape = 21, position = position_jitterdodge(dodge.width = 0.8, jitter.height = 0.2, jitter.width = 0.01)) +
theme_minimal() +
scale_fill_manual(values=c("white","black")) +
labs(y = "Standardized expression")
ra.ifny <- HeatmapAnnotation(
Condition = meta.CO[!meta.CO$condition %in% 'LPS',]$condition,
col = list(
Condition = c("IFNg"='#98FB98','ctrl'='white')),
show_annotation_name = T,
show_legend = T)
top50.new_IFNy <- c(rownames(new_IFNy[order(new_IFNy$log2FoldChange),][new_IFNy[order(new_IFNy$log2FoldChange),]$padj < 0.05,][1:25,]),
rownames(new_IFNy[order(new_IFNy$log2FoldChange, decreasing = T),][new_IFNy[order(new_IFNy$log2FoldChange, decreasing = T),]$padj < 0.05,][1:25,]))
hm_counts.ifny <- t(scale(t(batch.ifny[rownames(batch.ifny) %in% top50.new_IFNy,])))
ComplexHeatmap::Heatmap(hm_counts.ifny,
cluster_rows = T,
cluster_columns = T,
show_row_dend = T,
show_column_dend = T,
show_row_names = F,
show_column_names = F,
top_annotation = ra.ifny,
right_annotation = rowAnnotation(
text = anno_text(rownames(hm_counts.ifny), rot = 0, just = "left",gp=gpar(fontsize=8))),
col = colorRampPalette(rev(RColorBrewer::brewer.pal(9,"RdBu")))(10),
name = "Z-score expression")
ComplexHeatmap::Heatmap(DEG.enrich_ifny[!rownames(DEG.enrich_ifny) %in% 'Lipid metabolism',],
cluster_rows = F,
cluster_columns = F,
show_row_dend = F,
show_column_dend = F,
show_row_names = T,
col = colorRampPalette(RColorBrewer::brewer.pal(9,"Blues"))(30),
name = "log2(q)",
)
cgenes <- readxl::read_xlsx('/Users/kubler01/Documents/R projects/gene lists/ndd/Gene lists for organoid paper.xlsx', col_names = T)
m <- data.frame(t(toupper(na.omit(cgenes$`ADHD GWAS associated genes through FUMA`))[toupper(na.omit(cgenes$`ADHD GWAS associated genes through FUMA`)) %in% rownames(batch.ifny)]), check.names = F)
for(i in names(cgenes)[-1]){
k <- na.omit(cgenes[i])
s <- k[as.matrix(k)[,1] %in% rownames(batch.ifny),]
m <- rbind.fill(data.frame(m),data.frame(t(s)))
}
rownames(m) <- names(cgenes)
m <- t(m)
#extract DEGs
DEGs.ifny <- list("Up-regulated"=na.omit(all_res$IFNg$DEGs[,1]),"Down-regulated"=na.omit(all_res$IFNg$DEGs[,2]))
#gene set enrichment analysis:
ndd.enrich <- matrix(nrow=2*length(colnames(m)), ncol=6)
ndd.rep <- NULL
for (i in colnames(m))
ndd.rep <- c(ndd.rep, paste(i, "_up", sep=""), paste(i, "_down", sep=""))
rownames(ndd.enrich) <- ndd.rep
colnames(ndd.enrich) <- colnames(GSEA.byMod(mod.gl=DEGs.ifny, m[,1], universe=35324))
for (x in colnames(m)){
ndd.enrich[gsub("*_up","",gsub("*_down","",rownames(ndd.enrich))) %in% x,] <- as.matrix(GSEA.byMod(mod.gl=DEGs.ifny, na.omit(as.matrix(m)[,x]), universe=na.omit(rownames(res))))
}
ndd.enrich <- data.frame(ndd.enrich)
ndd.enrich$log.q <- -log2(ndd.enrich[,6])
#DEG.enrich.res[is.infinite(DEG.enrich.res$log.q),]$log.q <- 1
ndd.enrich.ress <- cbind("Upregulated"=ndd.enrich[c(seq(1, length(rownames(ndd.enrich))-1, by = 2)),][,7],
"Downregulated"=ndd.enrich[c(seq(2, length(rownames(ndd.enrich)), by = 2)),][,7])
rownames(ndd.enrich.ress) <- colnames(m)
ComplexHeatmap::Heatmap(ndd.enrich.ress,
cluster_rows = F,
cluster_columns = F,
show_row_dend = F,
show_column_dend = F,
show_row_names = T,
col = colorRampPalette(RColorBrewer::brewer.pal(9,"Blues"))(30),
name = "log2(q)",
)
dotplot(new_IFNy.upregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")
dotplot(new_IFNy.downregGO, split="ONTOLOGY", showCategory=10) + facet_grid(ONTOLOGY~., scale="free")
#==========iMG - COiMG correlation===============+#
#integrate iMG with COiMG
load('/Users/kubler01/Documents/R projects/bulk-org/data/Psamples_gene_matrix.RData')
genes_counts
meta_p <- data.frame(readxl::read_xlsx('/Users/kubler01/Documents/R projects/bulk-org/meta_P.xlsx'))
counts_p <- genes_counts[,colnames(genes_counts) %in% meta_p$ID]
counts_P.filt <- counts_p[rownames(counts_p) %in% counts_protcod2,]
rownames(counts_P.filt) <- make.unique(protcod_genes_reduced[,2])
#filter genes on expression
gene_names <- rownames(counts_P.filt)
cpm = edgeR::cpm(counts_P.filt)
median_genes_cpm <- enframe(rowMedians(as.matrix(counts_P.filt)), name = "Gene", value = "median_cpm")
median_genes_cpm <- cbind(median_genes_cpm, gene_names)
keep.exp <- dplyr::filter(median_genes_cpm, median_cpm > 1)
keep.exp <- keep.exp$gene_names
counts_P.filt2 <- counts_P.filt[keep.exp,]
dds.P <- DESeqDataSetFromMatrix(countData = round(counts_P.filt2),
colData = meta_p,
design = ~ RIN + `X260.280` + `X260.230` + type) # RIN + Line + Age + `260/230` + `260/280` +
vsd.p <- vst(dds.P)
transformed.p <- data.frame(assay(vsd.p), check.names=F)
batch.p <- removeBatchEffect(transformed.p,
covariates=as.matrix(cbind(
meta_p$`X260.280`,
meta_p$`X260.230`,
meta_p$RIN)),
design=model.matrix(~ meta_p$type))
iMG <- batch.p[,colnames(batch.p) %in% meta_p[meta_p$type %in% 'iMG',]$ID]
ocMG <- batch.p[,colnames(batch.p) %in% meta_p[meta_p$type %in% 'ocMG',]$ID]
cor(iMG,ocMG)
## P12 P13
## P10 1.0000000 0.9999866
## P11 1.0000000 0.9999866
## P9 0.9999866 1.0000000
df1 <- data.frame('iMG'=rowMeans(iMG))
df2 <- data.frame('ocMG'=rowMeans(ocMG))
df.plot <- cbind(df1,df2)
quantile(as.matrix(df1-df2))
## 0% 25% 50% 75% 100%
## -1.517314803 -0.060528021 0.007527633 0.078884006 1.029107876
rownames(df.plot)[df1-df2 >= 0.5 | df1-df2 <= -1]
## [1] "SCIN" "NR1H3" "APBA2" "CDH1" "RAI14" "ELN"
## [7] "FAR2" "NAV3" "HES2" "NUAK1" "FSCN1" "SLC1A3"
## [13] "EPB41L2" "OAS1" "ITGA6" "BLNK" "PLTP" "MXRA5"
## [19] "PTGDS" "ADGRD1" "KIF20A" "STEAP3" "OTOF" "CD207"
## [25] "GBP1" "SPP1" "LTBP2" "KCNJ5" "KIAA1217" "KCNJ2"
## [31] "AHNAK" "MCF2L" "CFP" "SLC44A2" "APOC1" "COL5A1"
## [37] "PPARG" "CLEC10A" "RIN2" "ACY3" "CHIT1" "NTS"
## [43] "ETS1" "ADAM19" "ITGA7" "DYSF" "GYPC" "ANGPTL2"
## [49] "IRF4" "IFI44L" "CHRNA1" "RGS16" "GRIP2" "PTPRG"
## [55] "MKI67" "HS3ST3A1" "MYO1E" "JAML" "PLPP3" "KIF26B"
## [61] "FYCO1" "MELTF" "IFI27" "HTRA1" "CX3CR1" "SOCS6"
## [67] "SH3RF3" "HSPB7" "TP53I11" "LRRN1" "ALS2CL" "PCED1B"
## [73] "TSPAN10" "CADM1" "KCNQ3" "INKA1" "POU3F1" "GLDN"
## [79] "RYR1" "GAL3ST4" "PDGFA" "ADGRG1" "LILRA4" "SIGLEC12"
Gene <- ifelse(rownames(df.plot) %in% rownames(df.plot)[df1-df2 >= 0.5 | df1-df2 <= -1], rownames(df.plot), "")
ggplot(df.plot, aes(x=iMG,y=ocMG))+
geom_point() +
labs(title = "Expression overlap iMG and ocMG",
x = "Standardized iMG counts",
y = "Standardized ocMG counts") +
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue", se=TRUE) +
geom_point(shape=18, color="grey")+
stat_cor(method = "pearson", label.x = 0, label.y = 20)+
geom_text_repel(data = df.plot, aes(x = iMG, y = ocMG, label = Gene), vjust = 3, size = 3) +
theme_minimal()
load("/Users/kubler01/Documents/R projects/bulk-org/01232024_figures AB_ifny in coimg.RData")
volcano_plot(data.frame(new_coimg_ifny), title = 'ctr vs IFNy in COiMg',
annotate_by=top30.new_coimg_ifny, ymax1 = 80, ymax2 = 81, xmax1 = -5, xmax2 = 15) +
scale_x_continuous(breaks = c(-5,-3,-1, 0,1,3,5),
labels = c("-5",
'-3',
"-1",
"0",
"1",
'3',
"5"))
lfc.plot <- data.frame('Gene'=new_IFNy$symbol,'CO'=new_IFNy$log2FoldChange,'COiMg'=new_coimg_ifny[rownames(new_coimg_ifny) %in% rownames(new_IFNy),]$log2FoldChange)
rownames(lfc.plot) <- new_IFNy$symbol
Gene <- ifelse(rownames(lfc.plot) %in% rownames(lfc.plot)[lfc.plot$CO-lfc.plot$COiMG >= 4 | lfc.plot$CO-lfc.plot$COiMG <= -4], rownames(lfc.plot), "")
ggplot(lfc.plot, aes(x=CO,y=COiMg))+
geom_point() +
labs(title = "lFC correlation in CO & COiMg under IFNy stimulation",
x = "lFC IFNy in CO",
y = "lFC IFNy in COiMg") +
geom_smooth(method=lm, linetype="dashed",
color="darkred", fill="blue", se=TRUE) +
geom_point(shape=18, color="grey")+
stat_cor(method = "pearson", label.x = 0, label.y = 20)+
geom_text_repel(data = lfc.plot, aes(x = CO, y = COiMg, label = Gene), vjust = 3, size = 3) +
theme_minimal()